{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('PPD_Training_Master_GBK_3_1_Training_Set.csv',encoding='gb18030')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'Idx',\n",
       " u'UserInfo_1',\n",
       " u'UserInfo_2',\n",
       " u'UserInfo_3',\n",
       " u'UserInfo_4',\n",
       " u'WeblogInfo_1',\n",
       " u'WeblogInfo_2',\n",
       " u'WeblogInfo_3',\n",
       " u'WeblogInfo_4',\n",
       " u'WeblogInfo_5',\n",
       " u'WeblogInfo_6',\n",
       " u'WeblogInfo_7',\n",
       " u'WeblogInfo_8',\n",
       " u'WeblogInfo_9',\n",
       " u'WeblogInfo_10',\n",
       " u'WeblogInfo_11',\n",
       " u'WeblogInfo_12',\n",
       " u'WeblogInfo_13',\n",
       " u'WeblogInfo_14',\n",
       " u'WeblogInfo_15',\n",
       " u'WeblogInfo_16',\n",
       " u'WeblogInfo_17',\n",
       " u'WeblogInfo_18',\n",
       " u'UserInfo_5',\n",
       " u'UserInfo_6',\n",
       " u'UserInfo_7',\n",
       " u'UserInfo_8',\n",
       " u'UserInfo_9',\n",
       " u'UserInfo_10',\n",
       " u'UserInfo_11',\n",
       " u'UserInfo_12',\n",
       " u'UserInfo_13',\n",
       " u'UserInfo_14',\n",
       " u'UserInfo_15',\n",
       " u'UserInfo_16',\n",
       " u'UserInfo_17',\n",
       " u'UserInfo_18',\n",
       " u'UserInfo_19',\n",
       " u'UserInfo_20',\n",
       " u'UserInfo_21',\n",
       " u'UserInfo_22',\n",
       " u'UserInfo_23',\n",
       " u'UserInfo_24',\n",
       " u'Education_Info1',\n",
       " u'Education_Info2',\n",
       " u'Education_Info3',\n",
       " u'Education_Info4',\n",
       " u'Education_Info5',\n",
       " u'Education_Info6',\n",
       " u'Education_Info7',\n",
       " u'Education_Info8',\n",
       " u'WeblogInfo_19',\n",
       " u'WeblogInfo_20',\n",
       " u'WeblogInfo_21',\n",
       " u'WeblogInfo_23',\n",
       " u'WeblogInfo_24',\n",
       " u'WeblogInfo_25',\n",
       " u'WeblogInfo_26',\n",
       " u'WeblogInfo_27',\n",
       " u'WeblogInfo_28',\n",
       " u'WeblogInfo_29',\n",
       " u'WeblogInfo_30',\n",
       " u'WeblogInfo_31',\n",
       " u'WeblogInfo_32',\n",
       " u'WeblogInfo_33',\n",
       " u'WeblogInfo_34',\n",
       " u'WeblogInfo_35',\n",
       " u'WeblogInfo_36',\n",
       " u'WeblogInfo_37',\n",
       " u'WeblogInfo_38',\n",
       " u'WeblogInfo_39',\n",
       " u'WeblogInfo_40',\n",
       " u'WeblogInfo_41',\n",
       " u'WeblogInfo_42',\n",
       " u'WeblogInfo_43',\n",
       " u'WeblogInfo_44',\n",
       " u'WeblogInfo_45',\n",
       " u'WeblogInfo_46',\n",
       " u'WeblogInfo_47',\n",
       " u'WeblogInfo_48',\n",
       " u'WeblogInfo_49',\n",
       " u'WeblogInfo_50',\n",
       " u'WeblogInfo_51',\n",
       " u'WeblogInfo_52',\n",
       " u'WeblogInfo_53',\n",
       " u'WeblogInfo_54',\n",
       " u'WeblogInfo_55',\n",
       " u'WeblogInfo_56',\n",
       " u'WeblogInfo_57',\n",
       " u'WeblogInfo_58',\n",
       " u'ThirdParty_Info_Period1_1',\n",
       " u'ThirdParty_Info_Period1_2',\n",
       " u'ThirdParty_Info_Period1_3',\n",
       " u'ThirdParty_Info_Period1_4',\n",
       " u'ThirdParty_Info_Period1_5',\n",
       " u'ThirdParty_Info_Period1_6',\n",
       " u'ThirdParty_Info_Period1_7',\n",
       " u'ThirdParty_Info_Period1_8',\n",
       " u'ThirdParty_Info_Period1_9',\n",
       " u'ThirdParty_Info_Period1_10',\n",
       " u'ThirdParty_Info_Period1_11',\n",
       " u'ThirdParty_Info_Period1_12',\n",
       " u'ThirdParty_Info_Period1_13',\n",
       " u'ThirdParty_Info_Period1_14',\n",
       " u'ThirdParty_Info_Period1_15',\n",
       " u'ThirdParty_Info_Period1_16',\n",
       " u'ThirdParty_Info_Period1_17',\n",
       " u'ThirdParty_Info_Period2_1',\n",
       " u'ThirdParty_Info_Period2_2',\n",
       " u'ThirdParty_Info_Period2_3',\n",
       " u'ThirdParty_Info_Period2_4',\n",
       " u'ThirdParty_Info_Period2_5',\n",
       " u'ThirdParty_Info_Period2_6',\n",
       " u'ThirdParty_Info_Period2_7',\n",
       " u'ThirdParty_Info_Period2_8',\n",
       " u'ThirdParty_Info_Period2_9',\n",
       " u'ThirdParty_Info_Period2_10',\n",
       " u'ThirdParty_Info_Period2_11',\n",
       " u'ThirdParty_Info_Period2_12',\n",
       " u'ThirdParty_Info_Period2_13',\n",
       " u'ThirdParty_Info_Period2_14',\n",
       " u'ThirdParty_Info_Period2_15',\n",
       " u'ThirdParty_Info_Period2_16',\n",
       " u'ThirdParty_Info_Period2_17',\n",
       " u'ThirdParty_Info_Period3_1',\n",
       " u'ThirdParty_Info_Period3_2',\n",
       " u'ThirdParty_Info_Period3_3',\n",
       " u'ThirdParty_Info_Period3_4',\n",
       " u'ThirdParty_Info_Period3_5',\n",
       " u'ThirdParty_Info_Period3_6',\n",
       " u'ThirdParty_Info_Period3_7',\n",
       " u'ThirdParty_Info_Period3_8',\n",
       " u'ThirdParty_Info_Period3_9',\n",
       " u'ThirdParty_Info_Period3_10',\n",
       " u'ThirdParty_Info_Period3_11',\n",
       " u'ThirdParty_Info_Period3_12',\n",
       " u'ThirdParty_Info_Period3_13',\n",
       " u'ThirdParty_Info_Period3_14',\n",
       " u'ThirdParty_Info_Period3_15',\n",
       " u'ThirdParty_Info_Period3_16',\n",
       " u'ThirdParty_Info_Period3_17',\n",
       " u'ThirdParty_Info_Period4_1',\n",
       " u'ThirdParty_Info_Period4_2',\n",
       " u'ThirdParty_Info_Period4_3',\n",
       " u'ThirdParty_Info_Period4_4',\n",
       " u'ThirdParty_Info_Period4_5',\n",
       " u'ThirdParty_Info_Period4_6',\n",
       " u'ThirdParty_Info_Period4_7',\n",
       " u'ThirdParty_Info_Period4_8',\n",
       " u'ThirdParty_Info_Period4_9',\n",
       " u'ThirdParty_Info_Period4_10',\n",
       " u'ThirdParty_Info_Period4_11',\n",
       " u'ThirdParty_Info_Period4_12',\n",
       " u'ThirdParty_Info_Period4_13',\n",
       " u'ThirdParty_Info_Period4_14',\n",
       " u'ThirdParty_Info_Period4_15',\n",
       " u'ThirdParty_Info_Period4_16',\n",
       " u'ThirdParty_Info_Period4_17',\n",
       " u'ThirdParty_Info_Period5_1',\n",
       " u'ThirdParty_Info_Period5_2',\n",
       " u'ThirdParty_Info_Period5_3',\n",
       " u'ThirdParty_Info_Period5_4',\n",
       " u'ThirdParty_Info_Period5_5',\n",
       " u'ThirdParty_Info_Period5_6',\n",
       " u'ThirdParty_Info_Period5_7',\n",
       " u'ThirdParty_Info_Period5_8',\n",
       " u'ThirdParty_Info_Period5_9',\n",
       " u'ThirdParty_Info_Period5_10',\n",
       " u'ThirdParty_Info_Period5_11',\n",
       " u'ThirdParty_Info_Period5_12',\n",
       " u'ThirdParty_Info_Period5_13',\n",
       " u'ThirdParty_Info_Period5_14',\n",
       " u'ThirdParty_Info_Period5_15',\n",
       " u'ThirdParty_Info_Period5_16',\n",
       " u'ThirdParty_Info_Period5_17',\n",
       " u'ThirdParty_Info_Period6_1',\n",
       " u'ThirdParty_Info_Period6_2',\n",
       " u'ThirdParty_Info_Period6_3',\n",
       " u'ThirdParty_Info_Period6_4',\n",
       " u'ThirdParty_Info_Period6_5',\n",
       " u'ThirdParty_Info_Period6_6',\n",
       " u'ThirdParty_Info_Period6_7',\n",
       " u'ThirdParty_Info_Period6_8',\n",
       " u'ThirdParty_Info_Period6_9',\n",
       " u'ThirdParty_Info_Period6_10',\n",
       " u'ThirdParty_Info_Period6_11',\n",
       " u'ThirdParty_Info_Period6_12',\n",
       " u'ThirdParty_Info_Period6_13',\n",
       " u'ThirdParty_Info_Period6_14',\n",
       " u'ThirdParty_Info_Period6_15',\n",
       " u'ThirdParty_Info_Period6_16',\n",
       " u'ThirdParty_Info_Period6_17',\n",
       " u'ThirdParty_Info_Period7_1',\n",
       " u'ThirdParty_Info_Period7_2',\n",
       " u'ThirdParty_Info_Period7_3',\n",
       " u'ThirdParty_Info_Period7_4',\n",
       " u'ThirdParty_Info_Period7_5',\n",
       " u'ThirdParty_Info_Period7_6',\n",
       " u'ThirdParty_Info_Period7_7',\n",
       " u'ThirdParty_Info_Period7_8',\n",
       " u'ThirdParty_Info_Period7_9',\n",
       " u'ThirdParty_Info_Period7_10',\n",
       " u'ThirdParty_Info_Period7_11',\n",
       " u'ThirdParty_Info_Period7_12',\n",
       " u'ThirdParty_Info_Period7_13',\n",
       " u'ThirdParty_Info_Period7_14',\n",
       " u'ThirdParty_Info_Period7_15',\n",
       " u'ThirdParty_Info_Period7_16',\n",
       " u'ThirdParty_Info_Period7_17',\n",
       " u'SocialNetwork_1',\n",
       " u'SocialNetwork_2',\n",
       " u'SocialNetwork_3',\n",
       " u'SocialNetwork_4',\n",
       " u'SocialNetwork_5',\n",
       " u'SocialNetwork_6',\n",
       " u'SocialNetwork_7',\n",
       " u'SocialNetwork_8',\n",
       " u'SocialNetwork_9',\n",
       " u'SocialNetwork_10',\n",
       " u'SocialNetwork_11',\n",
       " u'SocialNetwork_12',\n",
       " u'SocialNetwork_13',\n",
       " u'SocialNetwork_14',\n",
       " u'SocialNetwork_15',\n",
       " u'SocialNetwork_16',\n",
       " u'SocialNetwork_17',\n",
       " u'target',\n",
       " u'ListingInfo']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(train.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Idx</th>\n",
       "      <th>UserInfo_1</th>\n",
       "      <th>UserInfo_2</th>\n",
       "      <th>UserInfo_3</th>\n",
       "      <th>UserInfo_4</th>\n",
       "      <th>WeblogInfo_1</th>\n",
       "      <th>WeblogInfo_2</th>\n",
       "      <th>WeblogInfo_3</th>\n",
       "      <th>WeblogInfo_4</th>\n",
       "      <th>WeblogInfo_5</th>\n",
       "      <th>...</th>\n",
       "      <th>SocialNetwork_10</th>\n",
       "      <th>SocialNetwork_11</th>\n",
       "      <th>SocialNetwork_12</th>\n",
       "      <th>SocialNetwork_13</th>\n",
       "      <th>SocialNetwork_14</th>\n",
       "      <th>SocialNetwork_15</th>\n",
       "      <th>SocialNetwork_16</th>\n",
       "      <th>SocialNetwork_17</th>\n",
       "      <th>target</th>\n",
       "      <th>ListingInfo</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>深圳</td>\n",
       "      <td>4.0</td>\n",
       "      <td>深圳</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>222</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/3/5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10002</td>\n",
       "      <td>1.0</td>\n",
       "      <td>温州</td>\n",
       "      <td>4.0</td>\n",
       "      <td>温州</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/2/26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10003</td>\n",
       "      <td>1.0</td>\n",
       "      <td>宜昌</td>\n",
       "      <td>3.0</td>\n",
       "      <td>宜昌</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/2/28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10006</td>\n",
       "      <td>4.0</td>\n",
       "      <td>南平</td>\n",
       "      <td>1.0</td>\n",
       "      <td>南平</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/2/25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10007</td>\n",
       "      <td>5.0</td>\n",
       "      <td>辽阳</td>\n",
       "      <td>1.0</td>\n",
       "      <td>辽阳</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/2/27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 228 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Idx  UserInfo_1 UserInfo_2  UserInfo_3 UserInfo_4  WeblogInfo_1  \\\n",
       "0  10001         1.0         深圳         4.0         深圳           NaN   \n",
       "1  10002         1.0         温州         4.0         温州           NaN   \n",
       "2  10003         1.0         宜昌         3.0         宜昌           NaN   \n",
       "3  10006         4.0         南平         1.0         南平           NaN   \n",
       "4  10007         5.0         辽阳         1.0         辽阳           NaN   \n",
       "\n",
       "   WeblogInfo_2  WeblogInfo_3  WeblogInfo_4  WeblogInfo_5     ...       \\\n",
       "0           1.0           NaN           1.0           1.0     ...        \n",
       "1           0.0           NaN           1.0           1.0     ...        \n",
       "2           0.0           NaN           2.0           2.0     ...        \n",
       "3           NaN           NaN           NaN           NaN     ...        \n",
       "4           0.0           NaN           1.0           1.0     ...        \n",
       "\n",
       "   SocialNetwork_10  SocialNetwork_11  SocialNetwork_12  SocialNetwork_13  \\\n",
       "0               222                -1                 0                 0   \n",
       "1                 1                -1                 0                 0   \n",
       "2                -1                -1                -1                 1   \n",
       "3                -1                -1                -1                 0   \n",
       "4                -1                -1                -1                 0   \n",
       "\n",
       "   SocialNetwork_14  SocialNetwork_15  SocialNetwork_16  SocialNetwork_17  \\\n",
       "0                 0                 0                 0                 1   \n",
       "1                 0                 0                 0                 2   \n",
       "2                 0                 0                 0                 0   \n",
       "3                 0                 0                 0                 0   \n",
       "4                 0                 0                 0                 0   \n",
       "\n",
       "   target  ListingInfo  \n",
       "0       0     2014/3/5  \n",
       "1       0    2014/2/26  \n",
       "2       0    2014/2/28  \n",
       "3       0    2014/2/25  \n",
       "4       0    2014/2/27  \n",
       "\n",
       "[5 rows x 228 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
